---
title: "Clean Exp Data"
output: rmarkdown::null_document
---

Script cleans datasets from the student survey (May/June 2024) and general population survey (Oct/Nov 2024). Results for the student survey are presented in the online appendix.


```{r}

library(pacman)

p_load(tidyverse, readxl, texreg, stringr, scales)


```


# Survey 1 - Student Survey

```{r}

survey1 <- read_csv("survey1_final.csv", 
                    show_col_types = FALSE) 

# Keep Completes 
survey1 <- survey1 |> 
  filter(Finished=="TRUE")  

# recoding vars 
survey1 <- survey1 %>%
  mutate(
    # es una persona confiable en el contexto político
    trust1 = coalesce(!!!select(., starts_with("dv_mat_1"))),
    
    # debería de seguir involucrándose en la política
    stay_involved = coalesce(!!!select(., starts_with("dv_mat_2"))),
    
    # tomaría buenas decisiones políticas
    trust2 = coalesce(!!!select(., starts_with("dv_mat_3"))),
    
    # lo sucedido afectaría mucho en tu decisión de votar
    impact_vote = coalesce(!!!select(., starts_with("dv_mat_4"))),
    
    #Credible
    credible = coalesce(!!!select(., starts_with("credible"))),
    credible = ifelse(credible == "Sí", 1, 0),
  
    # si angel(a fuera a postularse
    vote = coalesce(!!!select(., starts_with("vote_"))),

    # trust index
    trust = (trust1 + trust2) / 2,
    
    # benevolent sexism index
    sexism_7 = as.numeric(sexism_7),
    sexism_9 = as.numeric(sexism_9),
    benevolent = (sexism_7 + sexism_9) / 2,
    
    # hostile sexism index
    sexism_6 = as.numeric(sexism_6),
    sexism_8 = as.numeric(sexism_8), 
    hostile = (sexism_6 + sexism_8) / 2,
    
    # liberal index
    ideology_1 = as.numeric(ideology_1),
    ideology_3 = as.numeric(ideology_3), 
    liberal = (ideology_1 + ideology_3) / 2,
    
    # respondent gender
    resp_gender = case_match(resp_gender,
                             "Hombre" ~ "Male",
                             "Mujer" ~ "Female"),
    
    # candidate gender 
    tx_gender = case_when(
      gender == "" ~ NA_character_,
      TRUE ~ gender
    ),
    
    # scandal
    tx_scandal = case_when(
      scandal == "" ~ NA_character_,
      TRUE ~ scandal
    )
  ) 

# get rid of the annoying dv_mat cols
survey1 <- survey1 |> 
  select(!starts_with("dv_mat"))

# Additional Recodes
survey1 <- survey1 |> 
  mutate(
    
    # Reverse code impact on vote (so low impact is high)
    impact_vote = 7 - impact_vote,
    
    # Make vote a numeric variable 
    vote = case_match(vote,
                     "No" ~ 0,
                     "Sí" ~ 1)
    )

# Rescale DVs from 0 to 1
survey1 <- survey1 |> 
  mutate(
    across(
      c(impact_vote, stay_involved, trust), 
      ~scales::rescale(.x))
  )

# Clean Up the Dates
survey1 <- survey1 |> 
  mutate(
    StartDate = dmy_hm(StartDate),
    EndDate = dmy_hm(EndDate),
    StartDay = as.Date(StartDate),
    EndDay = as.Date(EndDate)
  )

# Drop Obs missing all DVs
survey1 <- survey1 |> 
  mutate(
    no_dv = case_when(
      is.na(trust) & is.na(vote) & is.na(stay_involved) ~ 1,
      .default = 0)
    ) |> 
  filter(no_dv == 0)
  
  
  
survey1 <- survey1 |> 
  select(StartDay, EndDay, tx_scandal, tx_gender, trust, trust2, trust1, stay_involved, vote,
         impact_vote, benevolent, hostile, liberal, resp_gender, credible)

save(survey1, file = "survey1_clean.rds")



```


# Survey 2 - General Population


```{r}
# Main data set
survey2 <- read_csv("survey2_final.csv",
                show_col_types = FALSE)

# Keep completes
survey2 <- survey2 |> 
  filter(Finished=="TRUE")


```


```{r}

# These observations are missing sel
missing_sel <- survey2 |>
  filter(srvy_status == "complete" & is.na(sel)) |> 
  select(owid, sel) 


# The missing SEL is all "C"
survey2 <- survey2 |> 
  mutate(sel = case_when(
    srvy_status == "complete" & is.na(sel) ~ 3,
    .default = sel
  ))


```


```{r}



# recoding vars 
survey2 <- survey2 %>%
  mutate(
    # es una persona confiable en el contexto político
    trust1 = as.numeric(coalesce(!!!select(., starts_with("dv_mat_1")))), 
    
    # debería de seguir involucrándose en la política
    stay_involved = coalesce(!!!select(., starts_with("dv_mat_2"))),
    
    # tomaría buenas decisiones políticas
    trust2 = as.numeric(coalesce(!!!select(survey2, starts_with("dv_mat_3")), NA_real_)),
    
    # lo sucedido afectaría mucho en tu decisión de votar
    impact_vote = coalesce(!!!select(., starts_with("dv_mat_4"))),
    
    #Credible
    credible = coalesce(!!!select(., starts_with("credible"))),
    credible = ifelse(credible == "Sí", 1, 0),
    
    # si angel(a fuera a postularse
    vote = coalesce(!!!select(., starts_with("vote_"))),

    # trust index
    trust = ((trust1 + trust2)/2),

    # benevolent sexism index
    benevolent_6 = as.numeric(benevolent_6),
    benevolent_7 = as.numeric(benevolent_7),
    benevolent = (benevolent_6 + benevolent_7) / 2,
     
    # hostile sexism index
    hostile_6 = as.numeric(hostile_6),
    hostile_12 = as.numeric(hostile_12), 
    hostile = (hostile_6 + hostile_12) / 2,
     
    # liberal index
    ideology_1 = as.numeric(ideology_1_3),
    ideology_2 = as.numeric(ideology_1_4), 
    ideology_3 = as.numeric(ideology_2_1),
    ideology_4 = 7 - as.numeric(ideology_2_2),
    liberal = (ideology_1 + ideology_3 + ideology_3 + ideology_4) / 4,
  
    # respondent gender
    resp_gender = case_match(resp_gender,
                              "Hombre" ~ "Male",
                              "Mujer" ~ "Female"),
   
    # candidate gender 
    tx_gender = case_when(
    gender == "" ~ NA_character_,
    TRUE ~ gender
     ),
    
    # left right ideology
    left_right = left_right_4,
    
    # education 
    edu = case_match(nivel, 
                     c("Educación primaria", "No he completado ninguna educación formal") ~ "Primary/None",
                     "Educación secundaria" ~ "Secondary",
                     c("Posgrado o equivalente", "Licenciatura o equivalente") ~ "Post-Secondary"), 
  
    
     # scandal
     tx_scandal = case_when(
       scandal == "" ~ NA_character_,
       TRUE ~ scandal
     ),
    
    
    # sexual stereotype
    initiate_sex = stereotypes_7, 
    
    # state
    state = factor(`__js_RandomState`)
    
   ) 
 

# get rid of the annoying dv_mat cols
survey2 <- survey2 |> 
   select(!starts_with("dv_mat"))
 
 # Additional Recodes
survey2 <- survey2 |> 
   mutate(
     
     # Reverse code impact on vote (so low impact is high)
     impact_vote = 7 - impact_vote,
     
     # Make vote a numeric variable 
     vote = case_match(vote,
                      "No" ~ 0,
                      "Sí" ~ 1)
     )
 
 # Rescale DVs from 0 to 1
survey2 <- survey2 |> 
   mutate(
     across(
       c(impact_vote, stay_involved, trust), 
       ~scales::rescale(.x))
     
)

# Clean Up the Dates
survey2 <- survey2 |> 
  mutate(
    StartDate = dmy_hm(StartDate),
    EndDate = dmy_hm(EndDate),
    StartDay = as.Date(StartDate),
    EndDay = as.Date(EndDate)
  )


# Drop Obs missing all DVs
survey2 <- survey2 |> 
  mutate(
    no_dv = case_when(
      is.na(trust) & is.na(vote) & is.na(stay_involved) ~ 1,
      .default = 0)
    ) |> 
  filter(no_dv == 0)


survey2 <- survey2 |> 
  select(StartDay, EndDay, tx_scandal, tx_gender, trust, trust1, trust2, vote,
         stay_involved, impact_vote, benevolent, hostile, liberal, left_right, edu, sel,
         initiate_sex, credible, resp_gender, state)


save(survey2, file = "survey2_clean.rds")


```

